import time

import requests
from lxml import etree
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
}
"""
https://duanzixing.com/page/1/
https://duanzixing.com/page/2/
"""
# 抓取四页数据
for i in range(1,5):
    url = f'https://duanzixing.com/page/{i}/'
    print(url, '============================')
    res = requests.get(url, headers=headers)
    tree = etree.HTML(res.content.decode())
    article_list = tree.xpath('//article[@class="excerpt"]')
    for article in article_list:
        print(article.xpath('./header/h2/a/text()')[0])
        print(article.xpath('./p[@class="note"]/text()')[0])
    time.sleep(1)
